import numpy
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 5]
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
import time
from time import gmtime, strftime
from datetime import datetime
GOOG = pandas.read_csv('GOOG_stock_sample-2/GOOG_1min_sample_cordates.csv', sep=";")
GOOG['date'] = pandas.to_datetime(GOOG['date'], format="%d.%m.%Y %H:%M")
GOOG.name = "GOOG"
FDX = pandas.read_csv('FDX_stock_sample/FDX_1min_sample_cordates.csv', sep=";")
FDX['date'] = pandas.to_datetime(FDX['date'], format="%d.%m.%Y %H:%M")
FDX.name = "FDX"
GS = pandas.read_csv('GS_stock_sample/GS_1min_sample_cordates.csv', sep=";")
GS['date'] = pandas.to_datetime(GS['date'], format="%d.%m.%Y %H:%M")
GS.name = "GS"
KO = pandas.read_csv('KO_stock_sample/KO_1min_sample_cordates.csv', sep=";")
KO['date'] = pandas.to_datetime(KO['date'], format="%d.%m.%Y %H:%M")
KO.name = "KO"
initial = {}
initial['GOOG'] = GOOG
initial['FDX'] = FDX
initial['GS'] = GS
initial['KO'] = KO
ik = list(initial.keys())
initial_cut = {}
for t in ik:
for k in range(4,8) :
keydate = t + " Stock" + " Apr " + str(k)
initial_cut[keydate] = initial[t].iloc[int(1960/5*(k-4)):int(1960/5*(k-3))]
ik_cut = list(initial_cut.keys())
OHLC plots
import plotly.graph_objects as go
import plotly.express as px
def ohlcplot(dataframe, stockname):
fig = go.Figure(data=go.Ohlc(x=dataframe['date'],
open=dataframe['open'],
high=dataframe['high'],
low=dataframe['low'],
close=dataframe['close']))
fig.update(layout_xaxis_rangeslider_visible=False)
#fig.update_layout(shapes = [dict(x0='2022-04-04 16:00:00', x1='2022-04-04 16:00:00', y0=0, y1=1, xref='x', yref='paper',line_width=2, line_color = "#191970", opacity=0.4)])
fig.update_xaxes(
rangebreaks=[
# NOTE: Below values are bound (not single values), ie. hide x to y
#dict(bounds=["sat", "mon"]), hide weekends, eg. hide sat to before mon
dict(bounds=[16, 9.5], pattern="hour"), # hide hours outside of 9.30am-4pm
# dict(values=["2019-12-25", "2020-12-24"]) # hide holidays (Christmas and New Year's, etc)
]
)
fig.update_layout(
title= 'OHLC plot',
yaxis_title= stockname + " Stock"
)
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_yaxes(color="black")
fig.update_xaxes(color="black")
fig.update_layout(title_font_color="black")
fig.show()
for t in ik:
ohlcplot(initial[t], t)
Pre-work
numpy.random.seed(7)
scaler = MinMaxScaler(feature_range=(0, 1))
initial_close_cut = {}
dataset_close_cut = {}
for t in ik_cut:
initial_close_cut[t] = pandas.DataFrame(initial_cut[t].iloc[0:392, 4])
dataset_close_cut[t] = scaler.fit_transform(initial_close_cut[t][['close']].astype('float64'))
ick_cut = list(initial_close_cut.keys())
dck_cut = list(dataset_close_cut.keys())
import warnings
warnings.filterwarnings("ignore")
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
LSTM
lstm_1_rmse_train = {}
lstm_1_rmse_test = {}
lstm_1_mape_train = {}
lstm_1_mape_test = {}
lstm_WM_rmse_train = {}
lstm_WM_rmse_test = {}
lstm_WM_mape_train = {}
lstm_WM_mape_test = {}
lstm_TS_rmse_train = {}
lstm_TS_rmse_test = {}
lstm_TS_mape_train = {}
lstm_TS_mape_test = {}
lstm_MBB_rmse_train = {}
lstm_MBB_rmse_test = {}
lstm_MBB_mape_train = {}
lstm_MBB_mape_test = {}
lstm_SMBB_rmse_train = {}
lstm_SMBB_rmse_test = {}
lstm_SMBB_mape_train = {}
lstm_SMBB_mape_test = {}
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
def lstm_reg(dataset, stockname, lookback = 1, plotname = "", isstacked = ""):
# split into train and test sets
train_size = int(len(dataset) * 0.6)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
#print(dataset)
#print(scaler.inverse_transform(train))
#print(scaler.inverse_transform(test))
#print(scaler.inverse_transform(dataset))
# reshape into X=t and Y=t+1
look_back = lookback
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
#print(trainX, trainY)
#print(testX, testY)
if plotname == "" or plotname == " with Window Method":
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)
# make predictions
#trainPredict = model.predict(trainX)
#testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(model.predict(trainX))
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(model.predict(testX))
testY = scaler.inverse_transform([testY])
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
if plotname == "":
lstm_1_rmse_train[stockname] = trainScore
lstm_1_rmse_test[stockname] = testScore
lstm_1_mape_train[stockname] = trainScore_mape
lstm_1_mape_test[stockname] = testScore_mape
else:
lstm_WM_rmse_train[stockname] = trainScore
lstm_WM_rmse_test[stockname] = testScore
lstm_WM_mape_train[stockname] = trainScore_mape
lstm_WM_mape_test[stockname] = testScore_mape
else:
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1))
if plotname == " with Time Steps":
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)
# make predictions
trainPredict = scaler.inverse_transform(model.predict(trainX))
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(model.predict(testX))
testY = scaler.inverse_transform([testY])
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
lstm_TS_rmse_train[stockname] = trainScore
lstm_TS_rmse_test[stockname] = testScore
lstm_TS_mape_train[stockname] = trainScore_mape
lstm_TS_mape_test[stockname] = testScore_mape
else:
if isstacked == "":
# create and fit the LSTM network
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(100):
model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
model.reset_states()
# make predictions
trainPredict = scaler.inverse_transform(model.predict(trainX, batch_size=batch_size))
trainY = scaler.inverse_transform([trainY])
model.reset_states()
testPredict = scaler.inverse_transform(model.predict(testX, batch_size=batch_size))
testY = scaler.inverse_transform([testY])
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
lstm_MBB_rmse_train[stockname] = trainScore
lstm_MBB_rmse_test[stockname] = testScore
lstm_MBB_mape_train[stockname] = trainScore_mape
lstm_MBB_mape_test[stockname] = testScore_mape
else:
# create and fit the LSTM network
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True))
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(100):
model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
model.reset_states()
# make predictions
trainPredict = scaler.inverse_transform(model.predict(trainX, batch_size=batch_size))
trainY = scaler.inverse_transform([trainY])
model.reset_states()
testPredict = scaler.inverse_transform(model.predict(testX, batch_size=batch_size))
testY = scaler.inverse_transform([testY])
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
lstm_SMBB_rmse_train[stockname] = trainScore
lstm_SMBB_rmse_test[stockname] = testScore
lstm_SMBB_mape_train[stockname] = trainScore_mape
lstm_SMBB_mape_test[stockname] = testScore_mape
# calculate root mean squared error
print('Train Score: %.3f RMSE' % (trainScore))
print('Test Score: %.3f RMSE' % (testScore))
print('Train Score: %.3f MAPE' % (trainScore_mape))
print('Test Score: %.3f MAPE' % (testScore_mape))
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
#plt.plot(scaler.inverse_transform(dataset), color = "black")
#plt.plot(trainPredictPlot)
#plt.plot(testPredictPlot)
#plt.show()
fig = px.line(scaler.inverse_transform(dataset), color_discrete_sequence=["black"])
fig.add_traces(
list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces())
)
fig.add_traces(
list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces())
)
fig.update_layout(
title= isstacked + 'LSTM model prediction' + plotname,
yaxis_title= stockname,
xaxis_title = "N. of observation"
)
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_layout(showlegend=False)
fig.update_yaxes(color="black")
fig.update_xaxes(color="black")
fig.update_layout(title_font_color="black")
fig.show()
for t in ick_cut:
lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t)
Metal device set to: Apple M1 Train Score: 1.974 RMSE Test Score: 1.179 RMSE Train Score: 0.050 MAPE Test Score: 0.032 MAPE
Train Score: 1.982 RMSE Test Score: 1.414 RMSE Train Score: 0.052 MAPE Test Score: 0.038 MAPE
Train Score: 2.113 RMSE Test Score: 2.982 RMSE Train Score: 0.056 MAPE Test Score: 0.077 MAPE
Train Score: 2.889 RMSE Test Score: 2.035 RMSE Train Score: 0.076 MAPE Test Score: 0.056 MAPE
Train Score: 0.230 RMSE Test Score: 0.089 RMSE Train Score: 0.069 MAPE Test Score: 0.031 MAPE
Train Score: 0.233 RMSE Test Score: 0.297 RMSE Train Score: 0.082 MAPE Test Score: 0.111 MAPE
Train Score: 0.204 RMSE Test Score: 0.158 RMSE Train Score: 0.073 MAPE Test Score: 0.061 MAPE
Train Score: 0.197 RMSE Test Score: 0.208 RMSE Train Score: 0.068 MAPE Test Score: 0.080 MAPE
Train Score: 0.244 RMSE Test Score: 0.135 RMSE Train Score: 0.054 MAPE Test Score: 0.033 MAPE
Train Score: 0.221 RMSE Test Score: 0.139 RMSE Train Score: 0.051 MAPE Test Score: 0.033 MAPE
Train Score: 0.263 RMSE Test Score: 0.225 RMSE Train Score: 0.060 MAPE Test Score: 0.059 MAPE
Train Score: 0.235 RMSE Test Score: 0.199 RMSE Train Score: 0.058 MAPE Test Score: 0.050 MAPE
Train Score: 0.039 RMSE Test Score: 0.032 RMSE Train Score: 0.047 MAPE Test Score: 0.045 MAPE
Train Score: 0.035 RMSE Test Score: 0.045 RMSE Train Score: 0.041 MAPE Test Score: 0.055 MAPE
Train Score: 0.042 RMSE Test Score: 0.023 RMSE Train Score: 0.046 MAPE Test Score: 0.028 MAPE
Train Score: 0.034 RMSE Test Score: 0.021 RMSE Train Score: 0.041 MAPE Test Score: 0.026 MAPE
for t in ick_cut:
lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 3, " with Window Method")
Train Score: 1.945 RMSE Test Score: 1.289 RMSE Train Score: 0.049 MAPE Test Score: 0.035 MAPE
Train Score: 2.057 RMSE Test Score: 1.640 RMSE Train Score: 0.053 MAPE Test Score: 0.045 MAPE
Train Score: 2.223 RMSE Test Score: 3.216 RMSE Train Score: 0.060 MAPE Test Score: 0.086 MAPE
Train Score: 2.787 RMSE Test Score: 2.291 RMSE Train Score: 0.073 MAPE Test Score: 0.066 MAPE
Train Score: 0.218 RMSE Test Score: 0.088 RMSE Train Score: 0.067 MAPE Test Score: 0.031 MAPE
Train Score: 0.215 RMSE Test Score: 0.548 RMSE Train Score: 0.075 MAPE Test Score: 0.221 MAPE
Train Score: 0.196 RMSE Test Score: 0.158 RMSE Train Score: 0.070 MAPE Test Score: 0.062 MAPE
Train Score: 0.199 RMSE Test Score: 0.239 RMSE Train Score: 0.072 MAPE Test Score: 0.094 MAPE
Train Score: 0.245 RMSE Test Score: 0.157 RMSE Train Score: 0.057 MAPE Test Score: 0.039 MAPE
Train Score: 0.221 RMSE Test Score: 0.153 RMSE Train Score: 0.051 MAPE Test Score: 0.038 MAPE
Train Score: 0.260 RMSE Test Score: 0.255 RMSE Train Score: 0.061 MAPE Test Score: 0.067 MAPE
Train Score: 0.237 RMSE Test Score: 0.204 RMSE Train Score: 0.059 MAPE Test Score: 0.051 MAPE
Train Score: 0.035 RMSE Test Score: 0.033 RMSE Train Score: 0.043 MAPE Test Score: 0.046 MAPE
Train Score: 0.038 RMSE Test Score: 0.058 RMSE Train Score: 0.047 MAPE Test Score: 0.065 MAPE
Train Score: 0.040 RMSE Test Score: 0.023 RMSE Train Score: 0.045 MAPE Test Score: 0.028 MAPE
Train Score: 0.033 RMSE Test Score: 0.025 RMSE Train Score: 0.040 MAPE Test Score: 0.031 MAPE
for t in ick_cut:
lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 3, " with Time Steps")
Train Score: 1.997 RMSE Test Score: 1.364 RMSE Train Score: 0.050 MAPE Test Score: 0.037 MAPE
Train Score: 2.058 RMSE Test Score: 1.371 RMSE Train Score: 0.054 MAPE Test Score: 0.037 MAPE
Train Score: 2.222 RMSE Test Score: 3.395 RMSE Train Score: 0.061 MAPE Test Score: 0.091 MAPE
Train Score: 2.791 RMSE Test Score: 2.010 RMSE Train Score: 0.075 MAPE Test Score: 0.056 MAPE
Train Score: 0.218 RMSE Test Score: 0.088 RMSE Train Score: 0.067 MAPE Test Score: 0.031 MAPE
Train Score: 0.221 RMSE Test Score: 0.327 RMSE Train Score: 0.079 MAPE Test Score: 0.129 MAPE
Train Score: 0.203 RMSE Test Score: 0.166 RMSE Train Score: 0.072 MAPE Test Score: 0.064 MAPE
Train Score: 0.206 RMSE Test Score: 0.136 RMSE Train Score: 0.071 MAPE Test Score: 0.054 MAPE
Train Score: 0.247 RMSE Test Score: 0.168 RMSE Train Score: 0.055 MAPE Test Score: 0.041 MAPE
Train Score: 0.221 RMSE Test Score: 0.141 RMSE Train Score: 0.051 MAPE Test Score: 0.035 MAPE
Train Score: 0.260 RMSE Test Score: 0.249 RMSE Train Score: 0.060 MAPE Test Score: 0.064 MAPE
Train Score: 0.241 RMSE Test Score: 0.229 RMSE Train Score: 0.060 MAPE Test Score: 0.058 MAPE
Train Score: 0.037 RMSE Test Score: 0.036 RMSE Train Score: 0.048 MAPE Test Score: 0.051 MAPE
Train Score: 0.036 RMSE Test Score: 0.070 RMSE Train Score: 0.043 MAPE Test Score: 0.091 MAPE
Train Score: 0.039 RMSE Test Score: 0.023 RMSE Train Score: 0.044 MAPE Test Score: 0.029 MAPE
Train Score: 0.035 RMSE Test Score: 0.028 RMSE Train Score: 0.042 MAPE Test Score: 0.036 MAPE
for t in ick_cut:
lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 3, " with Memory Between Batches")
Train Score: 2.498 RMSE Test Score: 1.992 RMSE Train Score: 0.067 MAPE Test Score: 0.053 MAPE
Train Score: 2.265 RMSE Test Score: 1.927 RMSE Train Score: 0.059 MAPE Test Score: 0.055 MAPE
Train Score: 3.055 RMSE Test Score: 3.417 RMSE Train Score: 0.088 MAPE Test Score: 0.096 MAPE
Train Score: 3.402 RMSE Test Score: 3.985 RMSE Train Score: 0.089 MAPE Test Score: 0.128 MAPE
Train Score: 0.219 RMSE Test Score: 0.093 RMSE Train Score: 0.068 MAPE Test Score: 0.032 MAPE
Train Score: 0.281 RMSE Test Score: 0.688 RMSE Train Score: 0.091 MAPE Test Score: 0.292 MAPE
Train Score: 0.285 RMSE Test Score: 0.158 RMSE Train Score: 0.108 MAPE Test Score: 0.061 MAPE
Train Score: 0.310 RMSE Test Score: 0.498 RMSE Train Score: 0.134 MAPE Test Score: 0.209 MAPE
Train Score: 0.303 RMSE Test Score: 0.223 RMSE Train Score: 0.065 MAPE Test Score: 0.057 MAPE
Train Score: 0.320 RMSE Test Score: 0.172 RMSE Train Score: 0.071 MAPE Test Score: 0.043 MAPE
Train Score: 0.262 RMSE Test Score: 0.224 RMSE Train Score: 0.062 MAPE Test Score: 0.057 MAPE
Train Score: 0.293 RMSE Test Score: 0.312 RMSE Train Score: 0.078 MAPE Test Score: 0.083 MAPE
Train Score: 0.035 RMSE Test Score: 0.061 RMSE Train Score: 0.041 MAPE Test Score: 0.062 MAPE
Train Score: 0.053 RMSE Test Score: 0.065 RMSE Train Score: 0.068 MAPE Test Score: 0.077 MAPE
Train Score: 0.050 RMSE Test Score: 0.045 RMSE Train Score: 0.058 MAPE Test Score: 0.062 MAPE
Train Score: 0.044 RMSE Test Score: 0.037 RMSE Train Score: 0.053 MAPE Test Score: 0.048 MAPE
for t in ick_cut:
lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 2, " with Memory Between Batches", "Stacked ")
Train Score: 2.610 RMSE Test Score: 2.916 RMSE Train Score: 0.070 MAPE Test Score: 0.064 MAPE
Train Score: 2.494 RMSE Test Score: 2.015 RMSE Train Score: 0.066 MAPE Test Score: 0.057 MAPE
Train Score: 4.110 RMSE Test Score: 4.117 RMSE Train Score: 0.123 MAPE Test Score: 0.123 MAPE
Train Score: 3.494 RMSE Test Score: 4.550 RMSE Train Score: 0.095 MAPE Test Score: 0.147 MAPE
Train Score: 0.219 RMSE Test Score: 0.093 RMSE Train Score: 0.069 MAPE Test Score: 0.032 MAPE
Train Score: 0.248 RMSE Test Score: 1.050 RMSE Train Score: 0.086 MAPE Test Score: 0.441 MAPE
Train Score: 0.359 RMSE Test Score: 0.193 RMSE Train Score: 0.145 MAPE Test Score: 0.075 MAPE
Train Score: 0.281 RMSE Test Score: 0.795 RMSE Train Score: 0.114 MAPE Test Score: 0.333 MAPE
Train Score: 0.244 RMSE Test Score: 0.200 RMSE Train Score: 0.055 MAPE Test Score: 0.049 MAPE
Train Score: 0.333 RMSE Test Score: 0.279 RMSE Train Score: 0.078 MAPE Test Score: 0.072 MAPE
Train Score: 0.264 RMSE Test Score: 0.240 RMSE Train Score: 0.062 MAPE Test Score: 0.061 MAPE
Train Score: 0.253 RMSE Test Score: 0.658 RMSE Train Score: 0.064 MAPE Test Score: 0.156 MAPE
Train Score: 0.035 RMSE Test Score: 0.118 RMSE Train Score: 0.043 MAPE Test Score: 0.176 MAPE
Train Score: 0.045 RMSE Test Score: 0.092 RMSE Train Score: 0.057 MAPE Test Score: 0.097 MAPE
Train Score: 0.058 RMSE Test Score: 0.067 RMSE Train Score: 0.070 MAPE Test Score: 0.092 MAPE
Train Score: 0.042 RMSE Test Score: 0.075 RMSE Train Score: 0.051 MAPE Test Score: 0.102 MAPE
ARIMA
GOOG_arima = pandas.read_csv('GOOG_stock_sample-2/GOOG_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
GOOG_arima.name = "GOOG"
FDX_arima = pandas.read_csv('FDX_stock_sample/FDX_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
FDX_arima.name = "FDX"
GS_arima = pandas.read_csv('GS_stock_sample/GS_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
GS_arima.name = "GS"
KO_arima = pandas.read_csv('KO_stock_sample/KO_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
KO_arima.name = "KO"
initial_arima = {}
initial_arima['GOOG'] = GOOG_arima
initial_arima['FDX'] = FDX_arima
initial_arima['GS'] = GS_arima
initial_arima['KO'] = KO_arima
iarimak = list(initial_arima.keys())
initial_arima_cut = {}
for t in iarimak:
for k in range(4,8) :
keydate = t + " Stock" + " Apr " + str(k)
initial_arima_cut[keydate] = initial_arima[t].iloc[int(1960/5*(k-4)):int(1960/5*(k-3))]
iarimak_cut = list(initial_arima_cut.keys())
arima_rmse_train = {}
arima_rmse_test = {}
arima_mape_train = {}
arima_mape_test = {}
from statsmodels.tsa.arima.model import ARIMA
from math import sqrt
def arima_reg(dataframe, stockname, plotname = "", isstacked = ""):
dataframe = pandas.DataFrame(dataframe.iloc[0:392,3])
dataframe.index = dataframe.index.to_period('M')
# split into train and test sets
X = dataframe.values
size = int(len(X) * 0.6)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
# walk-forward validation
for t in range(len(test)):
model = ARIMA(history, order=(1,0,1))
model_fit = model.fit()
output = model_fit.forecast()
yhat = output[0]
predictions.append(yhat)
obs = test[t]
history.append(obs)
#print('predicted=%f, expected=%f' % (yhat, obs))
# evaluate forecasts
predictions_train = list()
history_train = list(train)
for t in range(len(train)):
model = ARIMA(history_train, order=(1,0,1))
model_fit = model.fit()
output = model_fit.forecast()
yhat = output[0]
predictions_train.append(yhat)
obs = train[t]
history_train.append(obs)
train_rmse = sqrt(mean_squared_error(train[1:len(train)], numpy.reshape(predictions_train, (235,1))[1:len(train)]))
test_rmse = sqrt(mean_squared_error(test, predictions))
trainScore_mape = mean_absolute_percentage_error(train[1:len(train)], numpy.reshape(predictions_train, (235,1))[1:len(train)])*100
testScore_mape = mean_absolute_percentage_error(test, predictions)*100
arima_rmse_train[stockname] = train_rmse
arima_rmse_test[stockname] = test_rmse
arima_mape_train[stockname] = trainScore_mape
arima_mape_test[stockname] = testScore_mape
print('Train Score: %.3f RMSE' % (train_rmse))
print('Test Score: %.3f RMSE' % (test_rmse))
print('Train Score: %.3f MAPE' % (trainScore_mape))
print('Test Score: %.3f MAPE' % (testScore_mape))
# plot forecasts against actual outcomes
testPredictPlot = numpy.empty_like(X)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(train):len(X), :] = numpy.reshape(predictions, (157,1))
trainPredictPlot = numpy.empty_like(X)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[0:len(predictions_train), :] = numpy.reshape(predictions_train, (235,1))
#plt.plot(trainPredictPlot[1:len(trainPredictPlot)], color="red")
#plt.plot(testPredictPlot[1:len(testPredictPlot)], color="black")
#plt.plot(predictionsPredictPlot[1:len(predictionsPredictPlot)], color = "green")
#plt.plot(predictions_trainPredictPlot[1:len(predictions_trainPredictPlot)], color = "blue")
#plt.show
fig = px.line(X[1:len(X)], color_discrete_sequence=["black"])
fig.add_traces(
list(px.line(trainPredictPlot[1:len(trainPredictPlot)], color_discrete_sequence=["#409a73"]).select_traces())
)
fig.add_traces(
list(px.line(testPredictPlot[1:len(testPredictPlot)], color_discrete_sequence=["#f34b3a"]).select_traces())
)
fig.update_layout(
title= isstacked + 'ARIMA model prediction' + plotname,
yaxis_title= stockname,
xaxis_title = "N. of observation"
)
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_layout(showlegend=False)
fig.show()
for t in iarimak_cut:
arima_reg(initial_arima_cut[t], t)
Train Score: 2.144 RMSE Test Score: 1.164 RMSE Train Score: 0.051 MAPE Test Score: 0.030 MAPE
Train Score: 2.015 RMSE Test Score: 1.302 RMSE Train Score: 0.052 MAPE Test Score: 0.033 MAPE
Train Score: 2.152 RMSE Test Score: 2.985 RMSE Train Score: 0.057 MAPE Test Score: 0.076 MAPE
Train Score: 2.858 RMSE Test Score: 1.993 RMSE Train Score: 0.073 MAPE Test Score: 0.053 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.236 RMSE Test Score: 0.084 RMSE Train Score: 0.069 MAPE Test Score: 0.029 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.214 RMSE Test Score: 0.124 RMSE Train Score: 0.075 MAPE Test Score: 0.046 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.200 RMSE Test Score: 0.166 RMSE Train Score: 0.071 MAPE Test Score: 0.064 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.202 RMSE Test Score: 0.136 RMSE Train Score: 0.069 MAPE Test Score: 0.051 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.247 RMSE Test Score: 0.134 RMSE Train Score: 0.055 MAPE Test Score: 0.032 MAPE
Train Score: 0.221 RMSE Test Score: 0.136 RMSE Train Score: 0.051 MAPE Test Score: 0.033 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.270 RMSE Test Score: 0.219 RMSE Train Score: 0.060 MAPE Test Score: 0.055 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.238 RMSE Test Score: 0.186 RMSE Train Score: 0.059 MAPE Test Score: 0.046 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.036 RMSE Test Score: 0.018 RMSE Train Score: 0.041 MAPE Test Score: 0.021 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.036 RMSE Test Score: 0.025 RMSE Train Score: 0.041 MAPE Test Score: 0.029 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.042 RMSE Test Score: 0.022 RMSE Train Score: 0.047 MAPE Test Score: 0.028 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals /Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
Train Score: 0.034 RMSE Test Score: 0.021 RMSE Train Score: 0.041 MAPE Test Score: 0.025 MAPE
CNN
initial_close_cut_xgboost = {}
for t in ik_cut:
initial_close_cut_xgboost[t] = pandas.DataFrame(initial_cut[t].set_index('date').iloc[0:392,3])
initial_close_cut_xgboost[t].name = str(t)
iccutxgbk = list(initial_close_cut_xgboost.keys())
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense,RepeatVector
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
cnn_rmse_train = {}
cnn_rmse_test = {}
cnn_mape_train = {}
cnn_mape_test = {}
scaler = MinMaxScaler(feature_range=(-1,1))
def cnn_reg(dataset, stockname):
df = dataset
df_1=df.values
df_1=df_1.astype('float64')
numpy.random.seed(7)
scaler = MinMaxScaler(feature_range=(-1,1))
ts = scaler.fit_transform(df_1)
timestep = 10
X= []
Y=[]
raw_data=ts
for i in range(len(raw_data)- (timestep)):
X.append(raw_data[i:i+timestep])
Y.append(raw_data[i+timestep])
X=np.asanyarray(X)
Y=np.asanyarray(Y)
k = 234
Xtrain = X[:k,:,:]
Ytrain = Y[:k]
numpy.random.seed(7)
model = Sequential()
model.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(10, 1)))
model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(Xtrain, Ytrain, epochs=100, verbose=0)
Xtest = X[k:,:,:]
Ytest= Y[k:]
preds = model.predict(Xtest)
preds = scaler.inverse_transform(preds)
Ytest=np.asanyarray(Ytest)
Ytest=Ytest.reshape(-1,1)
Ytest = scaler.inverse_transform(Ytest)
Ytrain=np.asanyarray(Ytrain)
Ytrain=Ytrain.reshape(-1,1)
Ytrain = scaler.inverse_transform(Ytrain)
preds_train = model.predict(Xtrain)
preds_train = scaler.inverse_transform(preds_train)
trainScore = sqrt(mean_squared_error(Ytrain,preds_train))
testScore = sqrt(mean_squared_error(Ytest,preds))
trainScore_mape = mean_absolute_percentage_error(Ytrain,preds_train)*100
testScore_mape = mean_absolute_percentage_error(Ytest,preds)*100
cnn_rmse_train[stockname] = trainScore
cnn_rmse_test[stockname] = testScore
cnn_mape_train[stockname] = trainScore_mape
cnn_mape_test[stockname] = testScore_mape
testPredictPlot = numpy.empty_like(raw_data)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(Ytrain):len(raw_data)- (timestep), :] = numpy.reshape(preds, (148,1))
testPlot = numpy.empty_like(raw_data)
testPlot[:, :] = numpy.nan
testPlot[len(Ytrain):len(raw_data)- (timestep), :] = numpy.reshape(Ytest, (148,1))
print('Train Score: %.3f RMSE' % trainScore)
print('Test Score: %.3f RMSE' % testScore)
print('Train Score: %.3f MAPE' % (trainScore_mape))
print('Test Score: %.3f MAPE' % (testScore_mape))
fig = px.line(Ytrain, color_discrete_sequence=["black"])
fig.add_traces(list(px.line(testPlot, color_discrete_sequence=["black"]).select_traces()))
fig.add_traces(list(px.line(preds_train, color_discrete_sequence=["#409a73"]).select_traces()))
fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
#fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
#fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
fig.update_layout(title= 'CNN model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_layout(showlegend=False)
fig.show()
for t in iccutxgbk:
cnn_reg(initial_close_cut_xgboost[t], t)
Train Score: 2.098 RMSE Test Score: 2.478 RMSE Train Score: 0.058 MAPE Test Score: 0.067 MAPE
Train Score: 1.468 RMSE Test Score: 2.194 RMSE Train Score: 0.038 MAPE Test Score: 0.062 MAPE
Train Score: 1.373 RMSE Test Score: 5.085 RMSE Train Score: 0.037 MAPE Test Score: 0.144 MAPE
Train Score: 1.557 RMSE Test Score: 5.133 RMSE Train Score: 0.041 MAPE Test Score: 0.156 MAPE
Train Score: 0.084 RMSE Test Score: 0.137 RMSE Train Score: 0.030 MAPE Test Score: 0.048 MAPE
Train Score: 0.150 RMSE Test Score: 0.270 RMSE Train Score: 0.054 MAPE Test Score: 0.111 MAPE
Train Score: 0.126 RMSE Test Score: 0.298 RMSE Train Score: 0.048 MAPE Test Score: 0.115 MAPE
Train Score: 0.163 RMSE Test Score: 1.086 RMSE Train Score: 0.060 MAPE Test Score: 0.468 MAPE
Train Score: 0.160 RMSE Test Score: 0.311 RMSE Train Score: 0.035 MAPE Test Score: 0.075 MAPE
Train Score: 0.156 RMSE Test Score: 0.251 RMSE Train Score: 0.037 MAPE Test Score: 0.061 MAPE
Train Score: 0.069 RMSE Test Score: 0.472 RMSE Train Score: 0.015 MAPE Test Score: 0.118 MAPE
Train Score: 0.212 RMSE Test Score: 0.709 RMSE Train Score: 0.054 MAPE Test Score: 0.176 MAPE
Train Score: 0.023 RMSE Test Score: 0.030 RMSE Train Score: 0.026 MAPE Test Score: 0.038 MAPE
Train Score: 0.023 RMSE Test Score: 0.131 RMSE Train Score: 0.027 MAPE Test Score: 0.154 MAPE
Train Score: 0.024 RMSE Test Score: 0.040 RMSE Train Score: 0.028 MAPE Test Score: 0.050 MAPE
Train Score: 0.024 RMSE Test Score: 0.078 RMSE Train Score: 0.028 MAPE Test Score: 0.107 MAPE
XGBoost
from numpy import loadtxt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from math import sqrt
from numpy import asarray
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from matplotlib import pyplot
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
xgb_rmse_train = {}
xgb_rmse_test = {}
xgb_mape_train = {}
xgb_mape_test = {}
# transform a time series dataset into a supervised learning dataset
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)
cols = list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
# put it all together
agg = concat(cols, axis=1)
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg.values
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
return data[:-n_test, :], data[-n_test:, :]
# fit an xgboost model and make a one step prediction
def xgboost_forecast(train, testX):
# transform list into array
train = asarray(train)
# split into input and output columns
trainX, trainy = train[:, :-1], train[:, -1]
# fit model
model = XGBRegressor(objective='reg:squarederror', n_estimators=30)
model.fit(trainX, trainy)
# make a one-step prediction
yhat = model.predict(asarray([testX]))
return yhat[0]
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, myset, stockname):
predictions = list()
# split dataset
train, test = train_test_split(data, n_test)
# seed history with training dataset
history = [x for x in train]
# step over each time-step in the test set
for i in range(len(test)):
# split test row into input and output columns
testX, testy = test[i, :-1], test[i, -1]
# fit model on history and make a prediction
yhat = xgboost_forecast(history, testX)
# store forecast in list of predictions
predictions.append(yhat)
# add actual observation to history for the next loop
# summarize progress
# estimate prediction error
predictions_train = list()
history_train = list(train)
for i in range(len(train)):
trainX, trainy = train[i, :-1], train[i, -1]
# fit model on history and make a prediction
yhat_train = xgboost_forecast(history_train, trainX)
predictions_train.append(yhat_train)
# add actual observation to history for the next loop
history_train.append(train[i])
trainScore = sqrt(mean_squared_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)])))
testScore = sqrt(mean_squared_error(test[:, -1], predictions))
trainScore_mape = mean_absolute_percentage_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)]))*100
testScore_mape = mean_absolute_percentage_error(test[:, -1], predictions)*100
xgb_rmse_train[stockname] = trainScore
xgb_rmse_test[stockname] = testScore
xgb_mape_train[stockname] = trainScore_mape
xgb_mape_test[stockname] = testScore_mape
testPredictPlot = numpy.empty_like(myset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(train)+10:len(myset), :] = numpy.reshape(predictions, (157,1))
trainPredictPlot = numpy.empty_like(myset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[0:len(predictions_train), :] = numpy.reshape(predictions_train, (225,1))
PredictPlot = numpy.empty_like(myset)
PredictPlot[:, :] = numpy.nan
PredictPlot[225:len(myset)-10, :] = numpy.reshape(myset[225:len(myset)-10], (157,1))
#return error, testPredictPlot, error_train, trainPredictPlot, PredictPlot
print('Train Score: %.3f RMSE' % trainScore)
print('Test Score: %.3f RMSE' % testScore)
print('Train Score: %.3f MAPE' % (trainScore_mape))
print('Test Score: %.3f MAPE' % (testScore_mape))
fig = px.line(train[1:len(train), -1], color_discrete_sequence=["black"])
fig.add_traces(list(px.line(PredictPlot, color_discrete_sequence=["black"]).select_traces()))
fig.add_traces(list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces()))
fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
#fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
#fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
fig.update_layout(title= 'XGBoost model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_layout(showlegend=False)
fig.show()
def xgboost_reg(dataset):
data = series_to_supervised(dataset, n_in=10)
walk_forward_validation(data, 157, dataset.values, dataset.name)
for t in iccutxgbk:
xgboost_reg(initial_close_cut_xgboost[t])
Train Score: 0.627 RMSE Test Score: 1.837 RMSE Train Score: 0.015 MAPE Test Score: 0.049 MAPE
Train Score: 0.759 RMSE Test Score: 1.945 RMSE Train Score: 0.020 MAPE Test Score: 0.053 MAPE
Train Score: 0.787 RMSE Test Score: 4.294 RMSE Train Score: 0.020 MAPE Test Score: 0.122 MAPE
Train Score: 0.993 RMSE Test Score: 3.873 RMSE Train Score: 0.026 MAPE Test Score: 0.112 MAPE
Train Score: 0.060 RMSE Test Score: 0.093 RMSE Train Score: 0.021 MAPE Test Score: 0.032 MAPE
Train Score: 0.083 RMSE Test Score: 1.494 RMSE Train Score: 0.029 MAPE Test Score: 0.594 MAPE
Train Score: 0.072 RMSE Test Score: 0.213 RMSE Train Score: 0.026 MAPE Test Score: 0.082 MAPE
Train Score: 0.081 RMSE Test Score: 1.027 RMSE Train Score: 0.029 MAPE Test Score: 0.421 MAPE
Train Score: 0.084 RMSE Test Score: 0.229 RMSE Train Score: 0.019 MAPE Test Score: 0.055 MAPE
Train Score: 0.082 RMSE Test Score: 0.208 RMSE Train Score: 0.019 MAPE Test Score: 0.053 MAPE
Train Score: 0.082 RMSE Test Score: 0.352 RMSE Train Score: 0.018 MAPE Test Score: 0.092 MAPE
Train Score: 0.087 RMSE Test Score: 0.611 RMSE Train Score: 0.022 MAPE Test Score: 0.148 MAPE
Train Score: 0.014 RMSE Test Score: 0.065 RMSE Train Score: 0.016 MAPE Test Score: 0.089 MAPE
Train Score: 0.014 RMSE Test Score: 0.130 RMSE Train Score: 0.017 MAPE Test Score: 0.130 MAPE
Train Score: 0.015 RMSE Test Score: 0.033 RMSE Train Score: 0.017 MAPE Test Score: 0.042 MAPE
Train Score: 0.014 RMSE Test Score: 0.180 RMSE Train Score: 0.016 MAPE Test Score: 0.247 MAPE
GRU
# load libraries
import pandas, time
import numpy as np
from keras.layers.recurrent import GRU
from keras.layers.core import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from keras.models import Sequential
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
gru_rmse_train = {}
gru_rmse_test = {}
gru_mape_train = {}
gru_mape_test = {}
from keras.models import clone_model
from keras.losses import MeanAbsolutePercentageError
def gru_reg(dataset, stockname):
# load the dataset
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.6)
train_dataset, test_dataset = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# Window -> X timestep back
step_back = 1
X_train, Y_train = [], []
for i in range(len(train_dataset)-step_back - 1):
a = train_dataset[i:(i+step_back), 0]
X_train.append(a)
Y_train.append(train_dataset[i + step_back, 0])
X_train = np.array(X_train); Y_train = np.array(Y_train);
X_test, Y_test = [], []
for i in range(len(test_dataset)-step_back - 1):
a = test_dataset[i:(i+step_back), 0]
X_test.append(a)
Y_test.append(test_dataset[i + step_back, 0])
X_test = np.array(X_test); Y_test = np.array(Y_test);
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
model = Sequential()
model.add(GRU(32, input_shape=(1, step_back)))
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(units = 1, activation = 'linear'))
model.compile(loss='mean_squared_error', optimizer=RMSprop(lr = 0.01))
model.fit(X_train, Y_train, epochs=20, batch_size=2, verbose=0)
# Estimate model performance
# Evaluate the skill of the Trained model
trainPredict = model.predict(X_train)
testPredict = model.predict(X_test)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)
Y_train = scaler.inverse_transform(Y_train.reshape(233,1))
Y_test = scaler.inverse_transform(Y_test.reshape(155,1))
trainScore = sqrt(mean_absolute_error(Y_train, trainPredict))
testScore = sqrt(mean_absolute_error(Y_test, testPredict))
trainScore_mape = mean_absolute_percentage_error(Y_train, trainPredict)*100
testScore_mape = mean_absolute_percentage_error(Y_test, testPredict)*100
print('Train Score: %.3f RMSE' % trainScore)
print('Test Score: %.3f RMSE' % testScore)
print('Train Score: %.3f MAPE' % (trainScore_mape))
print('Test Score: %.3f MAPE' % (testScore_mape))
gru_rmse_train[stockname] = trainScore
gru_rmse_test[stockname] = testScore
gru_mape_train[stockname] = trainScore_mape
gru_mape_test[stockname] = testScore_mape
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[step_back:len(trainPredict)+step_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(step_back*2)+1:len(dataset)-1, :] = testPredict
fig = px.line(scaler.inverse_transform(dataset), color_discrete_sequence=["black"])
fig.add_traces(list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces()))
fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
#fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
#fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
fig.update_layout(title= 'GRU model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_layout(showlegend=False)
fig.show()
for t in iccutxgbk:
gru_reg(initial_close_cut_xgboost[t], t)
Train Score: 1.314 RMSE Test Score: 1.164 RMSE Train Score: 0.060 MAPE Test Score: 0.047 MAPE
Train Score: 1.650 RMSE Test Score: 1.385 RMSE Train Score: 0.096 MAPE Test Score: 0.068 MAPE
Train Score: 1.594 RMSE Test Score: 1.576 RMSE Train Score: 0.092 MAPE Test Score: 0.090 MAPE
Train Score: 1.484 RMSE Test Score: 1.708 RMSE Train Score: 0.081 MAPE Test Score: 0.107 MAPE
Train Score: 0.403 RMSE Test Score: 0.271 RMSE Train Score: 0.074 MAPE Test Score: 0.034 MAPE
Train Score: 0.452 RMSE Test Score: 0.855 RMSE Train Score: 0.095 MAPE Test Score: 0.345 MAPE
Train Score: 0.638 RMSE Test Score: 0.397 RMSE Train Score: 0.197 MAPE Test Score: 0.077 MAPE
Train Score: 0.457 RMSE Test Score: 0.815 RMSE Train Score: 0.104 MAPE Test Score: 0.326 MAPE
Train Score: 0.624 RMSE Test Score: 0.529 RMSE Train Score: 0.119 MAPE Test Score: 0.085 MAPE
Train Score: 0.592 RMSE Test Score: 0.410 RMSE Train Score: 0.107 MAPE Test Score: 0.052 MAPE
Train Score: 0.554 RMSE Test Score: 0.665 RMSE Train Score: 0.096 MAPE Test Score: 0.140 MAPE
Train Score: 0.573 RMSE Test Score: 0.568 RMSE Train Score: 0.106 MAPE Test Score: 0.103 MAPE
Train Score: 0.234 RMSE Test Score: 0.340 RMSE Train Score: 0.088 MAPE Test Score: 0.185 MAPE
Train Score: 0.259 RMSE Test Score: 0.176 RMSE Train Score: 0.106 MAPE Test Score: 0.049 MAPE
Train Score: 0.244 RMSE Test Score: 0.223 RMSE Train Score: 0.095 MAPE Test Score: 0.079 MAPE
Train Score: 0.181 RMSE Test Score: 0.268 RMSE Train Score: 0.052 MAPE Test Score: 0.113 MAPE
Gradient Boosting
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from lightgbm import LGBMRegressor
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf
from scipy.stats import t
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.metrics import mean_absolute_error
gb_rmse_train = {}
gb_rmse_test = {}
gb_mape_train = {}
gb_mape_test = {}
def gradient_boost_reg(dataset, stockname):
df = pandas.DataFrame(dataset.iloc[0:392,:3])
horizon=int(len(df)*0.4)+1
X = df.drop('open', axis=1)
y = df['open']
#take last week of the dataset for validation
X_train, X_test = X.iloc[:-horizon,:], X.iloc[-horizon:,:]
y_train, y_test = y.iloc[:-horizon], y.iloc[-horizon:]
#create, train and do inference of the model
model = LGBMRegressor(random_state=7)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
#calculate RMSE
model.fit(X_train, y_train)
predictions_train = model.predict(X_train)
trainScore = sqrt(mean_absolute_error(y_train, predictions_train))
testScore = sqrt(mean_absolute_error(y_test, predictions))
trainScore_mape = mean_absolute_percentage_error(y_train, predictions_train)*100
testScore_mape = mean_absolute_percentage_error(y_test, predictions)*100
gb_rmse_train[stockname] = trainScore
gb_rmse_test[stockname] = testScore
gb_mape_train[stockname] = trainScore_mape
gb_mape_test[stockname] = testScore_mape
print('Train Score: %.3f RMSE' % trainScore)
print('Test Score: %.3f RMSE' % testScore)
print('Train Score: %.3f MAPE' % (trainScore_mape))
print('Test Score: %.3f MAPE' % (testScore_mape))
testPlot = np.empty_like(df)
testPlot[:, :] = np.nan
testPlot[len(numpy.array(y_train)):len(df), :] = numpy.reshape(numpy.array(y_test), (157,1))
testPredictPlot = np.empty_like(df)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(numpy.array(y_train)):len(df), :] = numpy.reshape(predictions, (157,1))
#plot reality vs prediction for the last week of the dataset
fig = px.line(testPlot, color_discrete_sequence=["black"])
fig.add_traces(list(px.line(numpy.array(y_train), color_discrete_sequence=["black"]).select_traces()))
fig.add_traces(list(px.line(predictions_train, color_discrete_sequence=["#409a73"]).select_traces()))
fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
#fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
#fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
fig.update_layout(title= 'Gradient Boosting model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_layout(showlegend=False)
fig.show()
for t in iarimak_cut[0:4]:
gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 1.223 RMSE Test Score: 1.216 RMSE Train Score: 0.052 MAPE Test Score: 0.052 MAPE
Train Score: 0.795 RMSE Test Score: 1.108 RMSE Train Score: 0.022 MAPE Test Score: 0.043 MAPE
Train Score: 1.061 RMSE Test Score: 1.452 RMSE Train Score: 0.041 MAPE Test Score: 0.077 MAPE
Train Score: 0.919 RMSE Test Score: 1.111 RMSE Train Score: 0.031 MAPE Test Score: 0.045 MAPE
for t in iarimak_cut[4:8]:
gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 0.305 RMSE Test Score: 0.207 RMSE Train Score: 0.043 MAPE Test Score: 0.020 MAPE
Train Score: 0.305 RMSE Test Score: 1.104 RMSE Train Score: 0.043 MAPE Test Score: 0.576 MAPE
Train Score: 0.278 RMSE Test Score: 0.290 RMSE Train Score: 0.038 MAPE Test Score: 0.041 MAPE
Train Score: 0.325 RMSE Test Score: 0.725 RMSE Train Score: 0.052 MAPE Test Score: 0.257 MAPE
for t in iarimak_cut[8:12]:
gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 0.339 RMSE Test Score: 0.275 RMSE Train Score: 0.035 MAPE Test Score: 0.023 MAPE
Train Score: 0.329 RMSE Test Score: 0.334 RMSE Train Score: 0.033 MAPE Test Score: 0.034 MAPE
Train Score: 0.324 RMSE Test Score: 0.500 RMSE Train Score: 0.033 MAPE Test Score: 0.079 MAPE
Train Score: 0.359 RMSE Test Score: 0.689 RMSE Train Score: 0.041 MAPE Test Score: 0.151 MAPE
for t in iarimak_cut[12:16]:
gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 0.134 RMSE Test Score: 0.173 RMSE Train Score: 0.029 MAPE Test Score: 0.048 MAPE
Train Score: 0.149 RMSE Test Score: 0.254 RMSE Train Score: 0.035 MAPE Test Score: 0.103 MAPE
Train Score: 0.141 RMSE Test Score: 0.119 RMSE Train Score: 0.032 MAPE Test Score: 0.022 MAPE
Train Score: 0.119 RMSE Test Score: 0.322 RMSE Train Score: 0.022 MAPE Test Score: 0.164 MAPE
Random Forest
from numpy import asarray
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from matplotlib import pyplot
rf_rmse_train = {}
rf_rmse_test = {}
rf_mape_train = {}
rf_mape_test = {}
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
return data[:-n_test, :], data[-n_test:, :]
# walk-forward validation for univariate data
def walk_forward_validation_rf(data, n_test, myset, stockname):
predictions = list()
# split dataset
train, test = train_test_split(data, n_test)
# seed history with training dataset
history = [x for x in train]
# step over each time-step in the test set
for i in range(len(test)):
# split test row into input and output columns
testX, testy = test[i, :-1], test[i, -1]
# fit model on history and make a prediction
yhat = random_forest_forecast(history, testX)
# store forecast in list of predictions
predictions.append(yhat)
# add actual observation to history for the next loop
history.append(test[i])
# estimate prediction error
predictions_train = list()
history_train = list(train)
for i in range(len(train)):
trainX, trainy = train[i, :-1], train[i, -1]
# fit model on history and make a prediction
yhat_train = random_forest_forecast(history_train, trainX)
predictions_train.append(yhat_train)
# add actual observation to history for the next loop
history_train.append(train[i])
trainScore = sqrt(mean_squared_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)])))
testScore = sqrt(mean_squared_error(test[:, -1], predictions))
trainScore_mape = mean_absolute_percentage_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)]))*100
testScore_mape = mean_absolute_percentage_error(test[:, -1], predictions)*100
rf_rmse_train[stockname] = trainScore
rf_rmse_test[stockname] = testScore
rf_mape_train[stockname] = trainScore_mape
rf_mape_test[stockname] = testScore_mape
testPredictPlot = numpy.empty_like(myset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(train)+10:len(myset), :] = numpy.reshape(predictions, (157,1))
trainPredictPlot = numpy.empty_like(myset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[0:len(predictions_train), :] = numpy.reshape(predictions_train, (225,1))
PredictPlot = numpy.empty_like(myset)
PredictPlot[:, :] = numpy.nan
PredictPlot[225:len(myset)-10, :] = numpy.reshape(myset[225:len(myset)-10], (157,1))
#return error, testPredictPlot, error_train, trainPredictPlot, PredictPlot
print('Train Score: %.3f RMSE' % trainScore)
print('Test Score: %.3f RMSE' % testScore)
print('Train Score: %.3f MAPE' % (trainScore_mape))
print('Test Score: %.3f MAPE' % (testScore_mape))
fig = px.line(train[1:len(train), -1], color_discrete_sequence=["black"])
fig.add_traces(list(px.line(PredictPlot, color_discrete_sequence=["black"]).select_traces()))
fig.add_traces(list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces()))
fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
#fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
#fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
fig.update_layout(title= 'Random Forest model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
fig.update_layout(plot_bgcolor="#E5E4E2")
fig.update_layout(showlegend=False)
fig.show()
# fit an random forest model and make a one step prediction
def random_forest_forecast(train, testX):
# transform list into array
train = asarray(train)
# split into input and output columns
trainX, trainy = train[:, :-1], train[:, -1]
# fit model
model = RandomForestRegressor(n_estimators=100)
model.fit(trainX, trainy)
# make a one-step prediction
yhat = model.predict([testX])
return yhat[0]
def rf_reg(dataset):
data = series_to_supervised(dataset, n_in=10)
walk_forward_validation_rf(data, 157, dataset.values, dataset.name)
for t in iccutxgbk:
rf_reg(initial_close_cut_xgboost[t])
Train Score: 0.790 RMSE Test Score: 1.352 RMSE Train Score: 0.018 MAPE Test Score: 0.036 MAPE
Train Score: 0.862 RMSE Test Score: 1.510 RMSE Train Score: 0.022 MAPE Test Score: 0.040 MAPE
Train Score: 0.816 RMSE Test Score: 3.490 RMSE Train Score: 0.022 MAPE Test Score: 0.088 MAPE
Train Score: 1.004 RMSE Test Score: 2.412 RMSE Train Score: 0.027 MAPE Test Score: 0.067 MAPE
Train Score: 0.081 RMSE Test Score: 0.085 RMSE Train Score: 0.025 MAPE Test Score: 0.029 MAPE
Train Score: 0.086 RMSE Test Score: 0.158 RMSE Train Score: 0.030 MAPE Test Score: 0.059 MAPE
Train Score: 0.075 RMSE Test Score: 0.179 RMSE Train Score: 0.027 MAPE Test Score: 0.070 MAPE
Train Score: 0.068 RMSE Test Score: 0.177 RMSE Train Score: 0.025 MAPE Test Score: 0.068 MAPE
Train Score: 0.093 RMSE Test Score: 0.162 RMSE Train Score: 0.021 MAPE Test Score: 0.038 MAPE
Train Score: 0.089 RMSE Test Score: 0.160 RMSE Train Score: 0.020 MAPE Test Score: 0.038 MAPE
Train Score: 0.092 RMSE Test Score: 0.241 RMSE Train Score: 0.022 MAPE Test Score: 0.062 MAPE
Train Score: 0.091 RMSE Test Score: 0.233 RMSE Train Score: 0.023 MAPE Test Score: 0.059 MAPE
Train Score: 0.013 RMSE Test Score: 0.019 RMSE Train Score: 0.015 MAPE Test Score: 0.024 MAPE
Train Score: 0.013 RMSE Test Score: 0.032 RMSE Train Score: 0.016 MAPE Test Score: 0.037 MAPE
Train Score: 0.016 RMSE Test Score: 0.025 RMSE Train Score: 0.017 MAPE Test Score: 0.032 MAPE
Train Score: 0.012 RMSE Test Score: 0.025 RMSE Train Score: 0.015 MAPE Test Score: 0.031 MAPE
def get_nth_key(dictionary, n=0):
if n < 0:
n += len(dictionary)
for i, key in enumerate(dictionary.keys()):
if i == n:
return key
raise IndexError("dictionary index out of range")
get_nth_key(rf_rmse_train, 0)
'GOOG Stock Apr 4'
keysright = {}
for k in range (0,4):
for t in range(k, 15, 4):
nkey = get_nth_key(rf_rmse_train, t)
keysright[nkey] = nkey
if k == 3 :
nkey = get_nth_key(rf_rmse_train, 15)
keysright[nkey] = nkey
keysright
{'GOOG Stock Apr 4': 'GOOG Stock Apr 4',
'FDX Stock Apr 4': 'FDX Stock Apr 4',
'GS Stock Apr 4': 'GS Stock Apr 4',
'KO Stock Apr 4': 'KO Stock Apr 4',
'GOOG Stock Apr 5': 'GOOG Stock Apr 5',
'FDX Stock Apr 5': 'FDX Stock Apr 5',
'GS Stock Apr 5': 'GS Stock Apr 5',
'KO Stock Apr 5': 'KO Stock Apr 5',
'GOOG Stock Apr 6': 'GOOG Stock Apr 6',
'FDX Stock Apr 6': 'FDX Stock Apr 6',
'GS Stock Apr 6': 'GS Stock Apr 6',
'KO Stock Apr 6': 'KO Stock Apr 6',
'GOOG Stock Apr 7': 'GOOG Stock Apr 7',
'FDX Stock Apr 7': 'FDX Stock Apr 7',
'GS Stock Apr 7': 'GS Stock Apr 7',
'KO Stock Apr 7': 'KO Stock Apr 7'}
dictsordered_rmse_train = {"LSTM":lstm_1_rmse_train,
'LSTM with Window Method': lstm_WM_rmse_train,
'LSTM with Time Steps': lstm_TS_rmse_train,
'LSTM with Memory Between Batches' :lstm_MBB_rmse_train,
'Stacked LSTM with Memory Between Batches' :lstm_SMBB_rmse_train,
'ARIMA': arima_rmse_train,
'CNN': cnn_rmse_train,
'GRU': gru_rmse_train,
'XGBoost': xgb_rmse_train,
'GB': gb_rmse_train,
'RF': rf_rmse_train}
dictsredordered_rmse_train = {}
for t in list(dictsordered_rmse_train.keys()):
dictsredordered_rmse_train[t] = {k: dictsordered_rmse_train[t][k] for k in keysright}
dataframe_rmse_train = pandas.DataFrame.from_dict(dictsredordered_rmse_train, orient = "index")
dictsordered_rmse_test = {"LSTM":lstm_1_rmse_test,
'LSTM with Window Method': lstm_WM_rmse_test,
'LSTM with Time Steps': lstm_TS_rmse_test,
'LSTM with Memory Between Batches' :lstm_MBB_rmse_test,
'Stacked LSTM with Memory Between Batches' :lstm_SMBB_rmse_test,
'ARIMA': arima_rmse_test,
'CNN': cnn_rmse_test,
'GRU': gru_rmse_test,
'XGBoost': xgb_rmse_test,
'GB': gb_rmse_test,
'RF': rf_rmse_test}
dictsredordered_rmse_test = {}
for t in list(dictsordered_rmse_test.keys()):
dictsredordered_rmse_test[t] = {k: dictsordered_rmse_test[t][k] for k in keysright}
dataframe_rmse_test = pandas.DataFrame.from_dict(dictsredordered_rmse_test, orient = "index")
dictsordered_mape_train = {"LSTM":lstm_1_mape_train,
'LSTM with Window Method': lstm_WM_mape_train,
'LSTM with Time Steps': lstm_TS_mape_train,
'LSTM with Memory Between Batches' :lstm_MBB_mape_train,
'Stacked LSTM with Memory Between Batches' :lstm_SMBB_mape_train,
'ARIMA': arima_mape_train,
'CNN': cnn_mape_train,
'GRU': gru_mape_train,
'XGBoost': xgb_mape_train,
'GB': gb_mape_train,
'RF': rf_mape_train}
dictsredordered_mape_train = {}
for t in list(dictsordered_mape_train.keys()):
dictsredordered_mape_train[t] = {k: dictsordered_mape_train[t][k] for k in keysright}
dataframe_mape_train = pandas.DataFrame.from_dict(dictsredordered_mape_train, orient = "index")
dictsordered_mape_test = {"LSTM":lstm_1_mape_test,
'LSTM with Window Method': lstm_WM_mape_test,
'LSTM with Time Steps': lstm_TS_mape_test,
'LSTM with Memory Between Batches' :lstm_MBB_mape_test,
'Stacked LSTM with Memory Between Batches' :lstm_SMBB_mape_test,
'ARIMA': arima_mape_test,
'CNN': cnn_mape_test,
'GRU': gru_mape_test,
'XGBoost': xgb_mape_test,
'GB': gb_mape_test,
'RF': rf_mape_test}
dictsredordered_mape_test = {}
for t in list(dictsordered_mape_test.keys()):
dictsredordered_mape_test[t] = {k: dictsordered_mape_test[t][k] for k in keysright}
dataframe_mape_test = pandas.DataFrame.from_dict(dictsredordered_mape_test, orient = "index")
rmse_4 = pd.concat([dataframe_rmse_train.iloc[:,0:4], dataframe_rmse_test.iloc[:,0:4]], axis=0)
rmses = {}
for t in range(0,16,4):
dname = "rmses_" + str(3+(t+4)//4)
rmses[dname] = pd.concat([dataframe_rmse_train.iloc[:,t:t+4], dataframe_rmse_test.iloc[:,t:t+4]], axis=0)
rmses[dname].to_csv(dname, index = True)
mapes = {}
for t in range(0,16,4):
dname = "mapes_" + str(3+(t+4)//4)
mapes[dname] = pd.concat([dataframe_mape_train.iloc[:,t:t+4], dataframe_mape_test.iloc[:,t:t+4]], axis=0)
mapes[dname].to_csv(dname, index = True)